package com.zxg.util;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class CrawlUtils {

  public static void main(String[] args) {
    String taobaoUrl = "https://misskkl.taobao.com/?spm=a1z10.5-c-s.0.0.4d3ce53dTBK26M";
    String tmallUrl = "https://esey.tmall.com/?spm=a220o.1000855.1997427721.d4918089.4732a2bdIbJe5X";

    System.out.println(taobaoUrl.substring(0, taobaoUrl.indexOf("taobao.com") + 10));
    System.out.println(tmallUrl.substring(0, tmallUrl.indexOf("tmall.com") + 9));
    
    try {
      getItemList();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

  }

  public static void getItemList() throws Exception {
//    String url = "https://misskkl.taobao.com/i/asynSearch.htm?_ksTS=1495172990363_126&callback=jsonp126&mid=w-16675095367-0&wid=16675095367&path=/search.htm&search=y&spm=a1z10.1-c-s.0.0.723b056bsJvhLx&orderType=hotsell_desc";
    String url = "https://langditu.tmall.com/i/asynSearch.htm?_ksTS=1495172990363_126&callback=jsonp126&mid=w-14596445944-0&wid=14596445944&path=/search.htm&search=y&spm=a1z10.3-b-s.w4011-14596445944.78.6523268flAUexS&scene=taobao_shop&orderType=hotsell_desc&tsearch=y";


    Document doc = Jsoup.connect(url).get();
    String source = doc.html().replace("\\&quot;", "");

    System.out.println(doc.html(source));
    Elements elements = doc.select("dl.item");
    int size = elements.size();
    System.out.println(size);

    for (Element element : elements) {
      String imgUrl = element.select("dt.photo a img").attr("data-ks-lazyload");
      System.out.println("imgUrl ->" + imgUrl);

      String title = element.select("dt.photo a img").attr("alt");
      String price = element.select("span.c-price").text();

      String itemId = element.attr("data-id");
      String itemUrl = element.select("dt.photo a").attr("href");

      System.out.println("title -> " + title);
      System.out.println("price -> " + price);
      System.out.println("itemId -> " + itemId);
      System.out.println("itemUrl -> " + itemUrl);
//      break;
    }

  }

}
